! 本精度Demo证明了规约算法不满足结合律，在16个节点，每个节点16个rank的配置下，shuffed data和random data只是顺序不同，但是结果却产生较大差异，因此同一个MPI，规约顺序不一致那么结果是不一致的，在arm和X86平台同时运行，发现random data的结果是一样的，说明不同的MPI，规约顺序一致的前提下，也只有Recursive算法可以保持一致
#define N 256
program all_reduce_example
  implicit none
  include 'mpif.h'
  integer :: ierr, myrank, numprocs, i, j
  real*8 :: data(N), rdata(N), sum(N),rsum(N), temp, rtemp, eps
  call MPI_INIT(ierr)
  call MPI_COMM_RANK(MPI_COMM_WORLD, myrank, ierr)
  call MPI_COMM_SIZE(MPI_COMM_WORLD, numprocs, ierr)
  call random_seed()
  eps = 1.0d-12 ! 定义相对误差阈值
data =[0.7038086369959871, 2.8251120218371284E-002, &
5.0046173844393138E-002, 0.9789957831859226, 0.3899646098248724, &
0.5580048927465384, 0.2869520527156482, 0.2856470017368906, &
0.5375782503396351, 0.7270150266098625, 0.7999334394302196, &
5.8484019322378344E-002, 0.1530875603643835, 0.2379960762633573, &
0.9308569877732396, 0.3438403549637741, 0.6866513745295322, &
0.8568961973603706, 0.2662471964817286, 0.9809031616176327, &
0.3228361381496967, 7.6615984354404532E-002, 0.4149010901069090, &
0.5531992491973767, 0.2665501633545233, 0.8604143884893318, &
0.8036310109642670, 0.2148345295371286, 0.6116832685197551, &
0.4196377237189068, 9.8410464752689109E-002, 0.7344879987375066, &
0.5586748845009026, 0.2983346430492873, 0.2765339210792774, &
0.3646576612344177, 0.7153911603551393, 0.8815110226505993, &
0.3749506274036918, 0.6914350111861864, 0.9178569104317944, &
0.9819413237096626, 0.7419254111399312, 0.1785816383679588, &
0.9062695407233150, 0.5295401789515495, 0.4015790474285694, &
0.8403358758926203, 0.9130696371054654, 0.4649444252242176, &
0.8278748220008367, 0.6781129685078469, 0.2049935371270379, &
0.6284607974606047, 0.3464554478748170, 0.2028254494045285, &
0.3695479796940333, 0.1228504475588323, 0.6104021211702673, &
8.8380859014748125E-002, 0.3814070877724873, 0.2758175204173483, &
0.6523906265103818, 1.1981168598836689E-002, 0.9287167349073684, &
0.2944767248779527, 0.7407619456415659, 0.4802654485112186, &
0.6900941371066835, 0.1337102720344063, 0.9229375223385574, &
8.7217393516382913E-002, 0.6830908979157471, 5.9642116800716849E-002, &
0.2565607195932387, 0.5333396435088247, 0.1755982525311310, &
6.4497985688234394E-002, 0.3354596372180652, 0.9089513461036205, &
0.5453208121076614, 0.1043149874384994, 0.3589747105661871, &
7.6221582859631098E-002, 0.3892167946148390, 0.2354149492143449, &
0.2380252594729058, 0.2819122329047445, 0.1634389763760140, &
7.2307692530586110E-002, 0.2950570660150618, 0.4945859790661444, &
0.8152518764135692, 0.3390372289071450, 0.1368056782188205, &
0.6305167032331269, 0.4035373251697649, 0.3605726885212306, &
0.4433522163456445, 0.4957803887850076, 0.7067382860927580, &
0.7927541197846040, 0.5959876377355755, 0.6813774758185502, &
0.7776926216897522, 0.8701772624687720, 0.8650618123151901, &
0.8910447037506373, 0.1759634548846947, 0.5929444981033214, &
0.2092144913759171, 1.8674905340105852E-003, 0.5215614069837642, &
0.5795007800544596, 0.9535171866245520, 0.6525667077215616, &
0.4976478793190182, 0.2282996930765222, 0.3722548998390636, &
0.5495048243601275, 0.3339441835401118, 0.2753405010087704, &
9.8476955545294231E-002, 0.2373167121542537, 0.4405495281107648, &
0.5099076384248065, 0.8682849991120918, 0.3076914469212113, &
0.2391842026882642, 0.9621109350945289, 8.9408418479266061E-002, &
0.8218021857366438, 0.9602581546427729, 0.7368320820072825, &
0.1904106281710511, 0.4616633183183296, 0.3713070100967713, &
0.2942023381828847, 1.2172583016052840E-002, 0.2888875837163454, &
0.6989800304725833, 0.8118565382075360, 0.8041099766076911, &
0.8804575821281446, 0.5965790306375567, 0.9381642331608475, &
0.7739674733020649, 0.8935183950869572, 0.7022597678647884, &
0.5568371852803295, 0.6749963151681300, 0.9643781014731161, &
0.3551817134052868, 7.3566777961559637E-002, 0.8510395234632142, &
0.6871688981841828, 0.2532656851894615, 5.4161743877870094E-002, &
0.8854233161690956, 0.6551495000709053, 0.5676264803123274, &
0.8498447158270181, 0.9923259770387176, 0.6593907894711606, &
0.5486678951578625, 0.2698862481771158, 0.4066819011073477, &
0.6673222922068476, 0.6237688909442767, 0.9038496085631493, &
0.3434530261386755, 0.2577214245705619, 0.3544911903910304, &
0.8770345761337381, 0.9580113524410194, 0.2288763423077711, &
0.9128709246414672, 0.9221176707033578, 0.7268792919607563, &
0.9503373294797370, 0.8882671317789317, 0.4615388197993298, &
0.1920039188804736, 0.1335611930681040, 0.6176596216865846, &
0.5120360227232084, 0.3653884283624791, 0.5354569450191491, &
0.3912826176386659, 0.9721508120776150, 0.3890705988569465, &
0.3233997808034985, 0.7643332873269202, 0.3041535422801331, &
0.8942684827809728, 0.1159498908177028, 0.2737371102832356, &
0.6526004191058519, 0.7656923620794629, 8.6272401661446452E-002, &
0.2495110838858068, 0.8913967319698202, 0.1646364418290602, &
0.1310807904419420, 0.6217293466805955, 0.6407937015244727, &
0.8635475440474352, 0.5537070406860067, 0.4544805712454405, &
0.3860626340075157, 0.9449472438046058, 0.7578160268284080, &
0.6696569315037095, 0.7282176815286761, 3.8663053113367596E-002, &
0.7106396058840687, 0.8440884284898544, 0.9191680153895163, &
0.6196144134984962, 0.2032994949424278, 0.8417203963260107, &
0.4658177751704500, 0.5599617169139890, 0.4831619575459314, &
0.7570065356284346, 0.2962009675714512, 0.8518804091779657, &
0.5049089607185948, 0.2409779843743394, 0.4266634671321441, &
2.4418649100127254E-002, 0.8905434622913333, 0.2155485666026635, &
8.5066412864193808E-002, 0.3458314825216604, 0.6440330625986235, &
9.3842957233761126E-002, 5.7268962928674227E-002, 0.5508841880346438, &
0.9057931994356494, 0.1271950201445549, 0.8508494928621957, &
0.3534699305001254, 0.4027645972126095, 0.4107021601542442, &
0.3681730045188942, 0.2775129599943398, 0.3778885796002527, &
0.2933080595039428, 0.6262507267569077, 0.4532394173830880, &
0.6233444425160002, 2.1921642198876157E-002, 0.3871510167377039, &
0.6835196896855820, 4.1236054177318238E-003]
  !do i = 1, N
  !   call random_number(data(i))
  !end do
  rdata = data
  if (myrank == 0) then
    write(*,*) 'random array: ', rdata
    do i = N, 2, -1
      call random_number(rtemp)
      j = 1 + int((i - 1) * rtemp)
      temp = data(i)
      data(i) = data(j)
      data(j) = temp
    end do
    write(*,*) 'Shuffled array: ', data
  end if
  call MPI_Allreduce(data, sum, N, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr)
  call MPI_Allreduce(rdata, rsum, N, MPI_DOUBLE_PRECISION, MPI_SUM, MPI_COMM_WORLD, ierr)
  if (myrank == 0) then
     write(*,100) 'The sum of all shuffed data is ', sum(1)
     write(*,100) 'The sum of all random data is ', rsum(1)
     if (abs(sum(1) - rsum(1)) < eps * max(abs(sum(1)), abs(rsum(1)))) then
        write(*,*) '相等'
     else
        write(*,*) '不相等'
     end if
     100 FORMAT('', b64.64)
  end if
  call MPI_FINALIZE(ierr)
end program all_reduce_example
!Intel MPI:  mpif90 -o all_reduce_example allreduce-exp.F90 && mpiexec -n 10 -genv I_MPI_ADJUST_ALLREDUCE=1 ./all_reduce_example
!HMPI:  mpif90 -o all_reduce_example allreduce-exp.F90 && mpirun -n 10 -x UCX_BUILTIN_ALLREDUCE_ALGORITHM=1 ./all_reduce_example